home *** CD-ROM | disk | FTP | other *** search
- /* WIDE AREA INFORMATION SERVER SOFTWARE
- No guarantees or restrictions. See the readme file for the full standard
- disclaimer.
- 5.29.90 Harry Morris, morris@think.com
- */
-
- /* this file is a server process for a unix machine that takes input from
- standard in or from a socket and searches the local search engine on the
- unix box.
- originally written by harry morris.
- modified by brewster kahle. 7/90
- 6.xx.90 Brewster - initial implementation of stdio interface
- 7.xx.90 Patrick Bray - support for headers and forking processes
- 90.07.31 Ephraim - support for logging
-
- 91.03.03 Jonathan - set searchLog to log_out.
- 91.05.23 Jonathan - added fork process for indexer.
- Fixed version display so it exits.
- 91.05.25 Jonathan - added setuid.
-
- Tue Jul 9 12:11:02 1991 -- Michael Haberler mah@wu-wien.ac.at
-
- Added semi-intelligent INFO database indexing (only done if
- any of the .src files is newer than INFO.dct)
-
- Locking against multiple concurrent INFO rebuilds if
- running under inetd
-
- Use scandir() for directory operations
-
- Works under inetd as well as standalone. Here are my inetd.conf
- entries (not the missing userid in the Ultrix inetd.conf!):
-
- hpux 7.0/800, Interactive/386 2.2.1:
- z3950 stream tcp nowait root /usr/local/etc/waisserver waisserver -s \
- -d /usr/logins/mah/wais-sources
-
- Ultrix 4.1:
- z3950 stream tcp nowait /usr/local/etc/waisserver waisserver -s \
- -d /usr/logins/mah/wais-sources
-
- Also, add the next line to /etc/services, and tickle your YP server:
- z3950 210/tcp # wide area information server (wais)
-
- * $Log: server-single.c,v $
- * Revision 1.8 92/05/10 14:49:06 jonathan
- * Updated for release
- *
- * Revision 1.7 92/03/05 07:08:23 shen
- * add two more dummy arguments to call to init_search_engine
- *
- * Revision 1.6 92/02/16 12:32:42 jonathan
- * Removed all code refering to use_stdio, since it's not a valid choice in
- * this server.
- *
- * Revision 1.5 92/02/12 13:42:04 jonathan
- * Changed server date.
- *
- * Revision 1.4 92/02/12 13:41:18 jonathan
- * Added "$Log" so RCS will put the log message in the header
- *
- */
-
- #define SERVER_DATE "Sun May 10 1992"
-
- #ifndef lint
- static char *RCSid = "$Header: /tmp_mnt/net/quake/proj/wais/wais-8-b5/ir/RCS/server-single.c,v 1.8 92/05/10 14:49:06 jonathan Exp $";
- #endif
-
- #define INFO_DICT "INFO.dct"
- #define LOCKFILE "/tmp/INFO.lock" /* while re-indexing INFO */
- #define NAPTIME 1 /* seconds */
- #define MAXNAPTIME 60 /* wait up to a minute for indexer to finish */
-
- #include "server.h"
- #include "sockets.h"
- #include <sys/types.h>
- #include <sys/stat.h>
- #ifdef USG
- #include <sys/fcntl.h>
- #else
- #include <sys/file.h>
- #endif
- #ifdef SYSV
- #define SIGCHLD SIGCLD
- #endif
- #include <signal.h>
- #include <string.h>
- #include "irdirent.h"
- #include "panic.h"
- #include "ustubs.h"
- #include "transprt.h"
- #include "wmessage.h"
- #include "ir.h"
- #include "wprot.h"
- #include "cutil.h"
- #include "futil.h"
- #include "irext.h"
-
- /* to create the INFO index */
- #include "irtfiles.h"
- #include "irfiles.h"
- #include "irhash.h"
- #include "version.h"
-
- /* for address decoding */
-
- #ifndef ultrix
- #include <sys/socket.h>
- #endif
- #include <netinet/in.h>
- #ifndef ultrix
- #include <netdb.h>
- #endif
- #include <arpa/inet.h>
-
- #include <setjmp.h>
-
- static jmp_buf jbuf;
-
- static long bufferSize = BUFSZ; /* how much we are using
- (we get one of these per process) */
-
- static FILE *current_client;
- static struct itimerval new, old;
-
- char *log_file_name = NULL;
-
- FILE *logfile; /* the logfile */
-
- /*---------------------------------------------------------------------------*/
- #define TIMEOUT_LENGTH 5 /* 5 second timeout.
- Since this is only in the handle - client code,
- I don't want it to wait too long.
- The bytes should come quickly
- */
- #define IDLE_TIME "5 seconds"
-
- long
- handle_client(in,out, index_directory)
- FILE* in;
- FILE* out;
- char *index_directory;
- {
- char buf[BUFSZ]; /* contains the message and header */
- char *bufPtr ; /* points at the begining of the z3950 */
- long size; /* bytes in the z3950 message */
- WAISMessage header; /* for storing the header */
- long i;
- long bytesLeft;
- long nextChar;
- int jmpres;
-
- new.it_interval.tv_sec = 0;
- new.it_interval.tv_usec = 0;
- new.it_value.tv_sec = TIMEOUT_LENGTH;
- new.it_value.tv_usec = 0;
-
- if ((jmpres = setjmp(jbuf)) != 0) {
- if (jmpres == 2) return EOF; /* because we're here by timeout */
- } else {
- /* try to read the header */
- for (i = 0; i < HEADER_LENGTH; i++)
- {
- setitimer(ITIMER_REAL, &new, NULL);
- nextChar = fgetc(in);
- if (nextChar == EOF) /* my connection exited, so will I */
- {
- setitimer(ITIMER_REAL, &old, NULL);
- return EOF;
- }
- else
- buf[i] = (char)nextChar;
- }
- setitimer(ITIMER_REAL, &old, NULL);
- /* parse the header */
- readWAISPacketHeader(buf,&header);
-
- /* make sure we have the right version.
- If we dont, we dont know what to do. */
- if (header.hdr_vers > HEADER_VERSION)
- panic("Incompatable header versions (Current version: %d, supplied version: %d.",
- HEADER_VERSION, header.hdr_vers) ;
-
- /* determine the size of the z3950 message */
- {
- char length_array[11];
- strncpy(length_array, header.msg_len, 10);
- length_array[10] = '\0';
- size = atol(length_array);
- }
-
- /* set bufPtr to start the z3950 message */
- bufPtr = buf + HEADER_LENGTH ;
-
- /* read the z3950 message */
- for (i = 0; i < size ; i++) {
- setitimer(ITIMER_REAL, &new, NULL);
- if ((buf[i + HEADER_LENGTH] = (char)fgetc(in)) == EOF)
- return -1;
- }
-
- setitimer(ITIMER_REAL, &old, NULL);
- rewind(in);
-
- /* decode the z3950 if necessary */
- transportDecode((long)header.encoding,bufPtr,&size);
-
- /* XXX handle compression options */
-
- /* process it the z3950 */
- bytesLeft = bufferSize;
-
- size = interpret_buffer(bufPtr,size,bufPtr,bytesLeft,
- &bufferSize,(long)header.hdr_vers,
- index_directory);
-
- /* re-encode the message if necessary */
- transportCode((long)header.encoding,bufPtr,&size);
-
- /* XXX handle compression options */
-
- /* write the new header */
- writeWAISPacketHeader(buf,size,
- (long)header.msg_type,header.server,
- (long)header.compression,(long)header.encoding,
- (long)header.hdr_vers);
-
- /* write the whole response to the output file */
- for (i = 0; i < size + HEADER_LENGTH; i++)
- fputc(buf[i],out) ;
-
- fflush(out); /* flush any file buffers */
- rewind(out);
-
- return 0;
- }
- }
-
- /*---------------------------------------------------------------------------*/
-
- #ifndef ISC
- static void breakKey _AP((long s1,long s2,struct sigcontext* s3,char* s4));
- #endif
-
- static void
- breakKey (s1,s2,s3,s4)
- long s1;
- long s2;
- struct sigcontext *s3;
- char *s4;
- {
- waislog(WLOG_HIGH, WLOG_ERROR, "got a ^c");
- exit (-1);
- }
-
- /*---------------------------------------------------------------------------*/
-
- void
- childhandler(sig, code, scp, addr)
- long sig, code;
- struct sigcontext *scp;
- char *addr;
- {
- wait(NULL); /* give the kid a decent burial */
- }
-
- /*---------------------------------------------------------------------------*/
-
- void
- server_alarmhandler(sig, code, scp, addr)
- long sig, code;
- struct sigcontext *scp;
- char *addr;
- {
- setitimer(ITIMER_REAL, &old, NULL);
- if(current_client != NULL) {
- waislog(WLOG_HIGH, WLOG_CLOSE,
- "Client idle longer %s during Read - Closing client connection.", IDLE_TIME);
- longjmp(jbuf, 2);
- }
- else {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "Bogus timer signal. What's the deal?");
- }
- }
-
- /*---------------------------------------------------------------------------*/
-
- void
- seghandler(sig, code, scp, addr)
- long sig, code;
- struct sigcontext *scp;
- char *addr;
- {
- waislog(WLOG_HIGH, WLOG_CLOSE,
- "Segmentation violation. Bummer. Closing server and exiting.");
- exit(0);
- }
-
- /*---------------------------------------------------------------------------*/
-
- void
- bushandler(sig, code, scp, addr)
- long sig, code;
- struct sigcontext *scp;
- char *addr;
- {
- waislog(WLOG_HIGH, WLOG_CLOSE,
- "Bus error. Bummer. Closing server and exiting.");
- exit(0);
- }
-
- /*---------------------------------------------------------------------------*/
-
- #include <pwd.h>
-
- int finduid(name)
- char *name;
- {
- struct passwd *pwent;
-
- if ((pwent = getpwnam(name)) == NULL) {
- return -1;
- }
-
- return(pwent->pw_uid);
- }
-
- static char *index_dir = NULL;
- static time_t info_change_time;
- static int indexing_needed = 0;
- static char *info_dict = INFO_DICT;
-
- extern int alphasort();
-
- /* selecttion function for scandir()
- * trigger on ".src" extension, regular file, and != "INFO.src"
- * Indexing is needed if any of the .src files is younger than
- * INFO.dct
- */
- static int
- srcfiles(e)
- struct dirent *e;
- {
- struct stat sb;
- char *lastdot = strrchr(e->d_name,'.');
- int candidate;
-
- candidate = lastdot &&
- (stat(merge_pathnames(e->d_name,index_dir), &sb) >= 0) &&
- ((sb.st_mode & S_IFMT) == S_IFREG) &&
- !strcmp(lastdot,source_ext) &&
- strcmp(e->d_name,info_dict); /* whew */
-
- if (candidate) {
- indexing_needed |= (sb.st_mtime > info_change_time);
- return 1;
- }
- return 0;
- }
-
-
- /*---------------------------------------------------------------------------*/
-
- #include <sys/types.h>
- #include <sys/time.h>
-
- typedef struct _client_connection {
- FILE *file;
- long buffersize;
- long pid;
- long line;
- } client_connection, *Client_connection;
-
- void
- main(argc,argv)
- int argc;
- char* argv[];
- {
- fd_set fds;
- FILE *file;
- client_connection clients[200];
- int numclients, i;
- long socket;
- char *next_argument = next_arg(&argc, &argv), *command_name;
- long tcp_port = 210; /* tcp_port to use */
- /* char *log_file_name = NULL; */ /* name of file for error output */
- int child_proc; /* for the child process id */
- char *uid_name = "root"; /* user id so setuid if root */
- int uid = 0; /* if not specified, leave as root. */
- long cm_mem_percent = 0; /* default */
- int child,lockfd;
- struct stat statbuf;
- struct dirent **list;
- int n_files,fd;
- int naptime = 0;
- extern int errno;
- extern char *sys_errlist[];
- char host_name[255];
- static long current_id = 1, current_log_line = 0;
-
- command_name = next_argument;
- host_name[0] = 0;
-
- getitimer(ITIMER_REAL, &old);
- for(i = 0; i < 200; i++)
- clients[i].file = NULL;
-
- if (argc == 0){
- printf("Usage: %s [-p [port_number]] [-d directory] [-u user] [-v] [-cmmem percent]\n",
- command_name);
- printf(" -p [port] listen to the port. If the port is supplied, then\n");
- printf(" that tcp_port number is used. If it is not supplied \n");
- printf(" then the Z39.50 port (210) is used.\n");
- printf(" -d directory: means to use the directory as the source of databases.\n");
- printf(" Defaults to the current directory.\n");
- printf(" -e [file]: set log output to file, or /dev/null if not specified.\n");
- printf(" -u user: if started as root, setuid to user after startup.\n");
- printf(" -cmmem number: percentage of CM memory to use (CM code only).\n");
- printf(" -v prints the version.\n");
- exit(1);
- }
- if(NULL == (next_argument = next_arg(&argc, &argv))){
- printf("No arguments specified\n");
- exit(0);
- }
- while((next_argument != NULL) &&
- ('-' == next_argument[0])){
- /* then we have an argument to process */
- if (0 == strcmp("-p", next_argument)){
- char *peek_argument = peek_arg(&argc, &argv);
- if ((NULL != peek_argument) && /* if we are not out of args */
- ('-' != peek_argument[0])){ { /* and the next isn't an option... */
- /* get the port number */
- tcp_port = atoi(next_arg(&argc, &argv));
- } /* end if (explicit tcp_port) */
- }
- } /* end if (-p) */
-
- else if (0 == strcmp("-e", next_argument)) {
- char *peek_argument = peek_arg(&argc, &argv);
- log_file_name = "/dev/null"; /* default to /dev/null */
- if ((peek_argument != NULL) &&
- ('-' != peek_argument[0])) {
- log_file_name = next_arg(&argc, &argv);
- } /* end if (explicit log file) */
- } /* end if (-e) */
- else if (0 == strcmp("-d", next_argument)) {
- index_dir = next_arg(&argc, &argv);
- }
- else if (0 == strcmp("-v", next_argument)) {
- printf("%s: %s, %s\n", command_name, VERSION, SERVER_DATE);
- }
- else if (0 == strcmp("-u", next_argument)) {
- uid_name = next_arg(&argc, &argv);
- if((uid = finduid(uid_name)) < 0)
- panic("Couldn't find user %s.", uid_name);
- }
- else if(0 == strcmp("-cmmem", next_argument)){
- if(NULL == (next_argument = next_arg(&argc, &argv)))
- panic("Expected a number (1-100) for percentage of memory to use");
- cm_mem_percent = atol(next_argument);
- if(cm_mem_percent < 1)
- panic("The -cmmem argument should not be less than 1 and less than 100");
- if(cm_mem_percent > 100)
- panic("Warning: The -cmmem parameter was %ld%%. It should be between 1-100.", cm_mem_percent);
- }
- else{
- panic("Don't recognize the %s option", next_argument);
- }
- next_argument = next_arg(&argc, &argv);
- } /* end while (more arguments) */
-
- if (log_file_name == NULL) {
- log_file_name = "/dev/null";
- logfile = stderr;
- }
- else logfile = NULL;
-
- if(0 != init_search_engine(index_dir, false, true, cm_mem_percent,0,0))
- panic("unable to initialize search engine");
-
- #ifdef AUTO_INDEX
-
- index_dir = index_dir ? index_dir : ".";
- info_dict = s_strdup(merge_pathnames(info_dict,index_dir));
-
- /* remember timestamp on INFO.dct if rebuilding needed
- * If it doesnt exist, it's assumed to be *very* old, to force
- * re-indexing
- */
- info_change_time = (stat(info_dict,&statbuf) == -1) ? 0 : statbuf.st_mtime;
-
- /* compare with candidates */
-
- if ((n_files = scandir(index_dir, &list, srcfiles, alphasort)) < 0) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "Error: reading directory %s, %s",
- index_dir, sys_errlist[errno]);
- exit(1);
- }
-
- /* ok. we know if we need indexing,
- * and have all the filenames.
- */
-
- if ((info_change_time == 0) || indexing_needed) {
-
- /* Time to re-index,
- * aquire the lock
- */
- waislog(WLOG_MEDIUM, WLOG_INDEX,
- "re-indexing needed, info_change_time=%d",info_change_time);
-
- if (( fd = open(LOCKFILE, O_WRONLY|O_CREAT|O_EXCL,0666)) == -1) {
-
- /* already locked by somebody else
- * spin till she finishes
- */
- while (!(stat(LOCKFILE,&statbuf) == -1)) {
- sleep(NAPTIME);
- naptime += NAPTIME;
- waislog(WLOG_MEDIUM, WLOG_INFO,
- "INFO locked, waiting since %d seconds", naptime);
- if (naptime > MAXNAPTIME) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "Warning - lockfile %s wont go away after %d seconds, exiting",
- LOCKFILE, naptime);
- exit(1); /* XXX be more perseverant */
- }
- }
- /* lockfile went away, assume INFO.* build finished
- * so just use it
- */
- } else { /* we aquired the lock, so rebuild database */
-
- if (!(child = fork())) {
- database *db;
- struct dirent **s = list;
- char filename[MAX_FILENAME_LEN];
-
- waislog(WLOG_MEDIUM, WLOG_INDEX,
- "Creating INFO database, pid=%d",getpid());
- db = openDatabase(merge_pathnames("INFO", index_dir),
- true, /* maybe this should append XXX */
- false);
- db->the_word_memory_hashtable =
- init_word_memory_hashtable(1L<<16, 100000, db->the_word_memory_hashtable);
-
- while (*s) { /* index it */
- strncpy(filename, index_dir, MAX_FILENAME_LEN);
- if(index_dir[strlen(index_dir) -1] != '/')
- strncat(filename, "/", MAX_FILENAME_LEN);
- strncat(filename, (*s)->d_name, MAX_FILENAME_LEN);
- waislog(WLOG_MEDIUM, WLOG_INDEX,
- "Indexing %s", filename);
- index_text_file(filename, NULL, NULL, NULL,
- NULL, "WSRC", db, true, false);
- s++;
- }
- freedir(list); /* array of filenames */
-
- if(!probe_file(source_filename(filename, db)))
- write_src_structure(source_filename(filename, db),
- "INFO", "WSRC", NULL, 0, true, tcp_port);
- finished_add_word(db);
- closeDatabase(db);
- if (unlink(LOCKFILE))
- panic("Indexer: cant unlink lockfile!\n");
- waislog(WLOG_MEDIUM, WLOG_INDEX,
- "Indexer pid=%d done", getpid());
-
- exit(0); /* indexing child */
-
- } else if (child == -1) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "Unable to fork for indexer.");
- exit(1);
- }
- /* wait for child process */
- else while (wait(0) != child) ; /* do nothing */
- }
- }
-
- #endif /* AUTO_INDEX */
-
- waislog(WLOG_MEDIUM, WLOG_INFO, "Running server %s", VERSION);
-
- signal(SIGINT, breakKey);
-
- signal(SIGCHLD, childhandler); /* XXX dont really need this any more */
- signal(SIGALRM, server_alarmhandler);
-
- signal(SIGSEGV, seghandler);
-
- signal(SIGBUS, bushandler);
-
- open_server(tcp_port,&socket,BUFSZ);
-
- #ifdef SECURE_SERVER
- /* if root, setuid to user specified id. */
- if (uid > 0 && getuid() == 0) {
- waislog(WLOG_MEDIUM, WLOG_INFO,
- "Setting uid to %s.", uid_name);
- if ( 0 > setuid(uid)) {
- waislog(WLOG_HIGH, WLOG_ERROR,
- "Unable to setuid to %s! Exiting.", uid_name);
- exit(-1);
- }
- }
- #endif
- while (TRUE) { /* be a server for several connections */
- int active, width = ulimit();
-
- FD_ZERO(&fds);
- FD_SET(socket, &fds);
- wais_pid = 0;
- for (i = 0; i < 200; i++) {
- if (clients[i].file != NULL)
- FD_SET(fileno(clients[i].file), &fds);
- }
- if((active = select(width, &fds, NULL, NULL, NULL)) < 1) {
- perror ("Select: ");
- waislog(WLOG_HIGH, WLOG_ERROR,
- "select returned an error!");
- }
- else {
- /* this is a connection on the socket. */
- if (FD_ISSET(socket, &fds)) {
- for(i = 0; i < 200; i++) {
- if (clients[i].file == NULL) {
- current_log_line++;
- accept_client_connection(socket, &clients[i].file);
- current_client = clients[i].file;
- clients[i].buffersize = BUFSZ;
- wais_pid = clients[i].pid = current_id++;
- log_line = clients[i].line = 0;
- if (handle_client(current_client, current_client, index_dir) == -1) {
- close_client_connection(current_client);
- current_client = clients[i].file = NULL;
- waislog(WLOG_MEDIUM, WLOG_CLOSE,
- "Done handling client");
- log_line = current_log_line;
- }
- else {
- setitimer(ITIMER_REAL, &old, NULL);
- clients[i].buffersize = bufferSize;
- clients[i].line = log_line;
- log_line = current_log_line;
- wais_pid = 0;
- }
-
- break;
- }
- }
- }
- /* this is for an established connection */
-
- for(i = 0; i < 200; i++) {
- if(clients[i].file != NULL && FD_ISSET(fileno(clients[i].file), &fds)) {
- current_client = clients[i].file;
- bufferSize = clients[i].buffersize;
- wais_pid = clients[i].pid;
- log_line = clients[i].line;
- if (handle_client(current_client, current_client, index_dir) == -1) {
- close_client_connection(current_client);
- current_client = clients[i].file = NULL;
- waislog(WLOG_MEDIUM, WLOG_CLOSE,
- "Done handling client");
- wais_pid = 0;
- log_line = current_log_line;
- }
- else setitimer(ITIMER_REAL, &old, NULL);
- clients[i].line = log_line;
- }
- }
- }
- }
- }
-
- /*---------------------------------------------------------------------------*/
-
-